package com.m3958.encode.detector.impl;

import java.nio.ByteBuffer;
import java.nio.file.Path;

import com.m3958.encode.detector.AbstractDetector;
import com.m3958.encode.detector.Detector;
import com.m3958.encode.detector.LanguageName;

public class Big5 extends AbstractDetector implements Detector {

    public Big5() {
    }

    public Big5(byte[] bytes) {
        super(bytes);
    }

    public Big5(Path path) {
        super(path);
    }

    /*
     * Shift JIS (Shift Japanese Industrial Standards
     * 0x8140 to 0xa0fe Reserved for user-defined characters 造字
     * 0xa140 to 0xa3bf "Graphical characters" 圖形碼
     * 0xa3c0 to 0xa3fe Reserved, not for user-defined characters
     * 0xa440 to 0xc67e Frequently used characters 常用字
     * 0xc6a1 to 0xc8fe Reserved for user-defined characters
     * 0xc940 to 0xf9d5 Less frequently used characters 次常用字
     * 0xf9d6 to 0xfefe Reserved for user-defined characters
     * https://en.wikipedia.org/wiki/Big5
     * The original Big5 character set is sorted first by usage frequency, second by stroke count, lastly by Kangxi radical.
     */

    public static final short r1_start = (short) 0x8140;
    public static final short r1_end = (short) 0xa0fe;

    public static final short r2_start = (short) 0xa140;
    public static final short r2_end = (short) 0xa3bf;

    public static final short r3_start = (short) 0xa3c0;
    public static final short r3_end = (short) 0xa3fe;

    public static final short r4_start = (short) 0xa440;
    public static final short r4_end = (short) 0xc67e;

    public static final short r5_start = (short) 0xc6a1;
    public static final short r5_end = (short) 0xc8fe;

    public static final short r6_start = (short) 0xc940;
    public static final short r6_end = (short) 0xf9d5;

    public static final short r7_start = (short) 0xf9d6;
    public static final short r7_end = (short) 0xfefe;

    @Override
    public int maxCharBytes() {
        return 2;
    }

    @Override
    protected int detectOne(byte... bytes) {
        int len = bytes.length;

        switch (len) {
        case 1:
            if (isAscii(bytes[0])) {
                successedNumber++;
                asciiNumber++;
                return 0;
            } else {
                return 1;
            }
        case 2:
            ByteBuffer bb = ByteBuffer.wrap(bytes);
            short s = bb.asShortBuffer().get();

            if ((s >= r1_start && s < r1_end) || //
                    (s >= r2_start && s < r2_end) || //
                    (s >= r3_start && s < r3_end) || //
                    (s >= r4_start && s < r4_end) || //
                    (s >= r5_start && s < r5_end) || //
                    (s >= r6_start && s < r6_end) || //
                    (s >= r7_start && s < r7_end)) { //
                successedNumber++;
                charNumber++;
            } else {
                failedNumber++;
            }
            return 0;
        default:
            return 0;
        }
    }

    @Override
    protected String getCharsetName() {
        return "Big5";
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.m3958.encode.detector.AbstractDetector#getLanguageName()
     */
    @Override
    protected LanguageName getLanguageName() {
        return LanguageName.TRADITIONAL_CHINESE;
    }

}
